# 爬取本地宝新闻内容
import json
import re
import datetime
import bs4
import requests


def getContent(url):
    res = requests.get(
        url=url,
        headers={
            'User-Agent': 'Mozilla/5.0(Macintosh;lnterl Mac OS X 10_14_6) AppleWebKit/537.36(KHTML,like Gecko)Chrome/87.0.4280.88 Safari/537.36'
        }
    )
    res.encoding = res.apparent_encoding
    dic_list = []
    if res.status_code == 200:
        soup = bs4.BeautifulSoup(res.text, 'html.parser')
        # body > div.box > div.container > div.list_con > ul > li:nth-child(1) > div.dot_name
        # body > div.box > div.container > div.list_con > ul > li:nth-child(16)
        anchors = soup.select('#bo')  ##右键copy selector
        # print(len(anchors))
        for anchor in anchors:
            textList = anchor.text.split()
        s = ""
        if textList:
            for t in textList:
                if "》》" in t or "北京本地宝首页" in t:
                    continue
                s = s + t + "\n"
        return s
    else:
        print('无法获取页面')


if __name__ == '__main__':
    getContent("http://bj.bendibao.com/news/2022627/319683.shtm")
